

clear
set more off

// author: Lucas

cap log close
log using "$logdir/analysis/subsets.log", replace

local incvar soi_agi
do "$adir/get_bad_dummies"

local inc_thr = 100000

	

forval knowl = 0/1 {
	*********************************
	*								*
	*			Upper bound			*
	*								*
	*********************************
	
	use $statadir/soi_cdw_taxsim_2019 if nonfiler == 0, clear

			
	get_bad_dummies
	gen ub = 1-bad_any
	
	di "Done bad dummies"
	*********************************
	*								*
	*			Lower bound			*
	*								*
	*********************************

	forval i = 1/2 {
		qui gen double delta`i' = soi_fed_incm_tax - taxsim1_fed_incm_tax
		gen lb`i' = abs(delta`i')<100
		qui replace lb`i' = 0 if missing(delta`i')
	}
	gen lb = lb1
	di "Done lower bound"
	*****************************************
	*										*
	*			Define the subsets			*
	*										*
	*****************************************
	if `knowl' == 1 {
		// maybe could do this a little better, since Sch 3 nonrefundable includes AOTC (nonrefundable part)
		gen has_bens = soi_sch3_nonrefndbl_credits > 0 | ///
			soi_sch3_payments_etc > soi_excess_sstax_withheld + soi_estmtd_tx_pymnts + soi_tx_paid_w_extnsn + 10
		qui replace has_bens = 1 if bad_itemizer | bad_aboveline
			
		gen has_int_div = soi_total_divs + soi_interest_txbl > 0
		gen has_ret = soi_ira_dist_txbl + soi_pens_anns_txbl > 0
		gen has_ss = soi_soc_sec > 0
		gen has_cg = soi_sched_d_capgains_long + soi_sched_d_capgains_short ~= 0
		gen has_allowed_sch1 = soi_ui + soi_txbl_refunds_etc
		gen has_disallowed_sch1 = abs(soi_sched_1_incm - soi_ui - soi_txbl_refunds_etc) > 10
		
		gen hi_inc = soi_agi > `inc_thr'
				
	}
	if `knowl' == 0 {
		gen has_bens = cdw_lag_inelig_ben | cdw_lag_itemizer
		gen has_int_div = cdw_lag_intincm + cdw_lag_divincm > 0
		gen has_ret = cdw_lag_gr_pens > 0 // this includes IRAs, despite the name
		gen has_ss = cdw_lag_all_ss > 0
		gen has_cg = cdw_lag_cg > 0
		gen has_allowed_sch1 = cdw_lag_ui_inc + cdw_lag_state_ref > 0
		
		//  will need to change this if lag year is not 2018.
		gen has_disallowed_sch1 = abs(cdw_lag_sch1_incm - cdw_lag_ui_inc - cdw_lag_state_ref - max(-3000,cdw_lag_cg)) > 10

		gen hi_inc = cdw_lag_agi > `inc_thr' // not bothering with inflation adjustment
	}
	
	
	// marital status and dependents
	if `knowl' == 1 {
		gen single = inlist(soi_fil_stat,1,4) // & !missing(taxsim1_fed_incm_tax)
		gen nodeps = soi_deps_tot == 0
	}
	if `knowl' == 0 {
		gen single = inlist(cdw_filing_status,1,4)
		gen nodeps = cdw_depx == 0 | missing(cdw_depx)
	}

	qui gen minrow = 11
	qui replace minrow = 10 if has_bens == 0
	qui replace minrow = 9 if minrow == 10 & has_disallowed_sch1 == 0
	
	qui replace minrow = 8 if minrow == 9 & hi_inc == 0
	qui replace minrow = 7 if minrow == 8 & has_cg == 0
	qui replace minrow = 6 if minrow == 7 & has_allowed_sch1 == 0
	qui replace minrow = 5 if minrow == 6 & has_ret == 0
	qui replace minrow = 4 if minrow == 5 & has_ss == 0
	qui replace minrow = 3 if minrow == 4 & has_int_div == 0
	qui replace minrow = 2 if minrow == 3 & nodeps
	qui replace minrow = 1 if minrow == 2 & single
	
	
	
	*****
	// we want to know: what share of those in the narrowest subset have only bad wages
	
	local badlist sched_c deps itemizer fil_stat rents_royal txbl_pens depcare cg pt t_unkn ///
		eitc_deps aboveline qbi_ded educ res_energ sched_f noncap_gains alimony early_roth wages int qual_divs
	
	foreach k in `badlist' {
		gen only_`k' = bad_`k'
		foreach kk in `badlist' {
			if "`k'"~="`kk'" {
				qui replace only_`k' = 0 if bad_`kk' == 1
			}
		}
	}
	
	
	di "For knowl = `knowl', first row failures: "
	sum bad_* if minrow == 1 & ub == 0 [aw=soi_wgt]
	sum only_* if minrow == 1 & ub == 0 [aw=soi_wgt]

	di ""
	di ""
	di "For knowl = `knowl', second row failures: "
	sum bad_* if minrow == 2 & ub == 0 [aw=soi_wgt]
	sum only_* if minrow == 2 & ub == 0 [aw=soi_wgt]


	qui sum soi_wgt
	local tot_wgt = r(sum)
	


	tempfile touse
	qui save `touse', replace

	di "Done subsets"


	*********************************************
	*											*
	*				Write the table				*
	*											*
	*********************************************

	local str1 " 1. \textbf{Narrowest:} Single, no dependents,  \\ "
	local str1 " `str1' \quad\quad only wages, \\ \quad\quad  no unobs. credits/deductions, \\ \quad \quad income under \\\$100k"
	local str2 " 2. Allow married "
	local str3 " 3. Allow dependents"
	
	local str4 "4. Add interest/dividends"
	local str5 "5. Add Social Security"
	local str6 "6. Add pension/IRA distributions"
	local str7 "7. Add gambling, UI, state tax refunds"
	local str8 "8. Add capital gains"
	local str9 "9. Add high income "
	local str10 " 10. All income types"
	local str11 "11. \textbf{Broadest}: Eliminate deduction \\ \quad \quad and credit restrictions"



	cap file close myfile
	qui file open myfile using "$outdir/tables/subsets_`knowl'.tex", write replace

	file write myfile " & Cumulative & \multicolumn{2}{c}{Cumulative} & \multicolumn{2}{c}{Marginal} \\ "
	file write myfile " & share of  & \multicolumn{2}{c}{success rate} & \multicolumn{2}{c}{success rate} \\ "
	file write myfile " \cline{3-4} \cline{5-6} "
	file write myfile " & population		& Tax-				& Item-			& Tax- 			& Item- \\ "
	file write myfile " & 					& liability 		& 	based 		& liability 	& based 	 \\ "
	file write myfile " & 					& approach 			& approach 		& approach 		& approach \\ "
	file write myfile " & (1) & (2) & (3) & (4) & (5) \\ \hline "

	
	forval row = 1/11 {
		
		use `touse', clear
		file write myfile "`str`row''"
		
		// column 1: mean
		qui sum soi_wgt if minrow <= `row'
		local e1 : di %6.2f `=r(sum)/`tot_wgt''
		
		// columns 2 and 3
		foreach k in lb ub {
			/*
			if `knowl' == 1 {
				qui sum `k'2 if minrow <= `row'  [aw=soi_wgt]
			}
			else {
			*/
				qui sum `k' if minrow <= `row'  [aw=soi_wgt]
			// }
			local e`k'1 : di %6.2f r(mean)
		}
		
		// columns 4 and 5 
		foreach k in lb ub {
			qui sum `k' if minrow == `row' [aw=soi_wgt]
			local e`k'2 : di %6.2f r(mean)
		}
		
		file write myfile " & `e1' & `elb1' & `eub1' & `elb2' & `eub2' \\ "
		
		if `row' < 11 {
			file write myfile " \addlinespace[3pt] "
		}
			
		di "Done `row' for knowl = `knowl'"
		di "& `e1' & `elb1' & `eub1' & `elb2' & `eub2' "
			
	}
		
		
	cap file close myfile


	*********************************
	*	Some descriptives	*
	*********************************

	use `touse', clear
	qui keep if ub == 0

	// using the bad prefixes just to make the reshape work
	gen bad_mfj_to_mfs = cdw_filing_status == 2 & soi_fil_stat == 3
	gen bad_mfs_to_mfj = cdw_filing_status == 3 & soi_fil_stat == 2
	gen bad_age = 2018 - max(soi_prim_yob,1900)

	gcollapse (mean) bad_* [aw=soi_wgt], by(minrow) fast

	qui reshape long bad_, i(minrow) j(badtype) string
	gsort +minrow -bad_

	di "For knowledge = `knowl'"
	list



} // end loop over IRS knowledge

cap log close


